
# ------------------------------------------------------------------- #
# Replication script for: 
# "How Americans Evaluate Redistributive vs. Symbolic Racial Justice Policies" 
# Roxanne Rahnama and Mark Williamson 
# ------------------------------------------------------------------- #

# Preliminaries -----
require(dplyr); require(ggplot2); require(estimatr); require(margins);
require(stargazer); require(tidyr); require(nnet); require(quanteda); 
require(cowplot); require(quanteda.textplots); require(quanteda.textstats); 
library(ggtext)

main.theme <- theme_classic() + 
  theme(legend.key = element_rect(fill = NA, color = NA), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        text = element_text(family = "Helvetica", size = 16))
theme_set(main.theme)

# Clean data
suppressMessages(suppressWarnings(source("./clean_data.R")))

# Read in clean data 
df <- read.csv("./clean_data/clean_survey_data.csv")


# Create experimental data frame with imputed values for missing values on pre-treatment

# Summarise missingness by variable 
apply(df[,c("age", "region", "man", "race", "race_id_import", "left_right", "pol_interest", 
            "pol_know", "party_id", "usa_pride", "rr", "educ", "hh_income")], MARGIN = 2,
      FUN = function(x) {mean(is.na(x))}) * 100

# Write a mode function for categorical 
mode_fun <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

# Calculate block-specific mean/mode among non-missing obs. 
block_cov <- df %>%
  group_by(party_id) %>%
  summarise(age = mean(age, na.rm = TRUE),
            region = mode_fun(region),
            man = mode_fun(man),
            race = mode_fun(race),
            race_id_import = mean(race_id_import, na.rm = TRUE),
            left_right = mean(left_right, na.rm = TRUE),
            pol_interest = mean(pol_interest, na.rm = TRUE),
            pol_know = mean(pol_know, na.rm = TRUE),
            usa_pride = mean(usa_pride, na.rm = TRUE),
            rr = mean(rr, na.rm = TRUE),
            educ = mean(educ, na.rm = TRUE),
            hh_income = mean(hh_income, na.rm = TRUE))

# Impute missing values for variables 
exp_df <- df
for (i in 1:nrow(exp_df)) {
  exp_df$age[i] <- ifelse(is.na(exp_df$age[i]), block_cov$age[block_cov$party_id==exp_df$party_id[i]], exp_df$age[i])
  exp_df$region[i] <- ifelse(is.na(exp_df$region[i]), block_cov$region[block_cov$party_id==exp_df$party_id[i]], exp_df$region[i])
  exp_df$man[i] <- ifelse(is.na(exp_df$man[i]), block_cov$man[block_cov$party_id==exp_df$party_id[i]], exp_df$man[i])
  exp_df$race[i] <- ifelse(is.na(exp_df$race[i]), block_cov$race[block_cov$party_id==exp_df$party_id[i]], exp_df$race[i])
  exp_df$race_id_import[i] <- ifelse(is.na(exp_df$race_id_import[i]), block_cov$race_id_import[block_cov$party_id==exp_df$party_id[i]], exp_df$race_id_import[i])
  exp_df$left_right[i] <- ifelse(is.na(exp_df$left_right[i]), block_cov$left_right[block_cov$party_id==exp_df$party_id[i]], exp_df$left_right[i])
  exp_df$pol_interest[i] <- ifelse(is.na(exp_df$pol_interest[i]), block_cov$pol_interest[block_cov$party_id==exp_df$party_id[i]], exp_df$pol_interest[i])
  exp_df$pol_know[i] <- ifelse(is.na(exp_df$pol_know[i]), block_cov$pol_know[block_cov$party_id==exp_df$party_id[i]], exp_df$pol_know[i])
  exp_df$usa_pride[i] <- ifelse(is.na(exp_df$usa_pride[i]), block_cov$usa_pride[block_cov$party_id==exp_df$party_id[i]], exp_df$usa_pride[i])
  exp_df$rr[i] <- ifelse(is.na(exp_df$rr[i]), block_cov$rr[block_cov$party_id==exp_df$party_id[i]], exp_df$rr[i])
  exp_df$educ[i] <- ifelse(is.na(exp_df$educ[i]), block_cov$educ[block_cov$party_id==exp_df$party_id[i]], exp_df$educ[i])
  exp_df$hh_income[i] <- ifelse(is.na(exp_df$hh_income[i]), block_cov$hh_income[block_cov$party_id==exp_df$party_id[i]], exp_df$hh_income[i])
}

# Confirm no missingness 
apply(exp_df[,c("age", "region", "man", "race", "race_id_import", "left_right", "pol_interest", 
                "pol_know", "party_id", "usa_pride", "rr", "educ", "hh_income")], MARGIN = 2,
      FUN = function(x) {mean(is.na(x))})

# Re-standardize continuous variables 
exp_df$age_z <- (exp_df$age - mean(exp_df$age, na.rm = TRUE)) / sd(exp_df$age, na.rm = TRUE)
exp_df$race_id_import_z <- (exp_df$race_id_import - mean(exp_df$race_id_import, na.rm = TRUE)) / sd(exp_df$race_id_import, na.rm = TRUE)
exp_df$left_right_z <- (exp_df$left_right - mean(exp_df$left_right, na.rm = TRUE)) / sd(exp_df$left_right, na.rm = TRUE)
exp_df$pol_interest_z <- (exp_df$pol_interest - mean(exp_df$pol_interest, na.rm = TRUE)) / sd(exp_df$pol_interest, na.rm = TRUE)
exp_df$pol_know_z <- (exp_df$pol_know - mean(exp_df$pol_know, na.rm = TRUE)) / sd(exp_df$pol_know, na.rm = TRUE)
exp_df$usa_pride_z <- (exp_df$usa_pride - mean(exp_df$usa_pride, na.rm = TRUE)) / sd(exp_df$usa_pride, na.rm = TRUE)
exp_df$rr_z <- (exp_df$rr - mean(exp_df$rr, na.rm = TRUE)) / sd(exp_df$rr, na.rm = TRUE)
exp_df$educ_z <- (exp_df$educ - mean(exp_df$educ, na.rm = TRUE)) / sd(exp_df$educ, na.rm = TRUE)
exp_df$hh_income_z <- (exp_df$hh_income - mean(exp_df$hh_income, na.rm = TRUE)) / sd(exp_df$hh_income, na.rm = TRUE)


# MAIN TEXT ----------------------------------------------

## TABLE 1 ----------------------------------------

# Mean and s.d. 
df %>% 
  filter(treated==0) %>%
  summarise(symbol1avg = mean(symbol1, na.rm = TRUE),
            symbol1sd = sd(symbol1, na.rm = TRUE),
            symbol1n = sum(!is.na(symbol1)),
            symbol2avg = mean(symbol2, na.rm = TRUE),
            symbol2sd = sd(symbol2, na.rm = TRUE),
            symbol2n = sum(!is.na(symbol2)),
            symbol3avg = mean(symbol3, na.rm = TRUE),
            symbol3sd = sd(symbol3, na.rm = TRUE),
            symbol3n = sum(!is.na(symbol3)),
            redist1avg = mean(redist1, na.rm = TRUE),
            redist1sd = sd(redist1, na.rm = TRUE),
            redist1n = sum(!is.na(redist1)),
            redist2avg = mean(redist2, na.rm = TRUE),
            redist2sd = sd(redist2, na.rm = TRUE),
            redist2n = sum(!is.na(redist2)),
            redist3avg = mean(redist3, na.rm = TRUE),
            redist3sd = sd(redist3, na.rm = TRUE),
            redist3n = sum(!is.na(redist3))
  )

# average ranking / proportion of people who ranked this issue 
# the highest of the 3 in-category, median, proportion of answers above 7 / below 3, etc.
df %>%
  filter(treated==0) %>%
  summarise(mean(symbol1<3, na.rm = TRUE),
            mean(symbol2<3, na.rm = TRUE),
            mean(symbol3<3, na.rm = TRUE),
            mean(redist1<3, na.rm = TRUE),
            mean(redist2<3, na.rm = TRUE),
            mean(redist3<3, na.rm = TRUE),
            mean(symbol1>7, na.rm = TRUE),
            mean(symbol2>7, na.rm = TRUE),
            mean(symbol3>7, na.rm = TRUE),
            mean(redist1>7, na.rm = TRUE),
            mean(redist2>7, na.rm = TRUE),
            mean(redist3>7, na.rm = TRUE)) %>%
  round(2)


# Prop ranking # 1 and #3 within category 
df %>%
  filter(treated==0) %>%
  mutate(symbol1_1 = as.integer(symbol1 > symbol2 & symbol1 > symbol3),
         symbol2_1 = as.integer(symbol2 > symbol1 & symbol2 > symbol3),
         symbol3_1 = as.integer(symbol3 > symbol2 & symbol3 > symbol1),
         symbol1_3 = as.integer(symbol1 < symbol2 & symbol1 < symbol3),
         symbol2_3 = as.integer(symbol2 < symbol1 & symbol2 < symbol3),
         symbol3_3 = as.integer(symbol3 < symbol2 & symbol3 < symbol1)) %>%
  summarise(symbol1_1 = mean(symbol1_1, na.rm = TRUE),
            symbol2_1 = mean(symbol2_1, na.rm = TRUE),
            symbol3_1 = mean(symbol3_1, na.rm = TRUE),
            symbol1_3 = mean(symbol1_3, na.rm = TRUE),
            symbol2_3 = mean(symbol2_3, na.rm = TRUE),
            symbol3_3 = mean(symbol3_3, na.rm = TRUE))

df %>%
  filter(treated==0) %>%
  mutate(redist1_1 = as.integer(redist1 > redist2 & redist1 > redist3),
         redist2_1 = as.integer(redist2 > redist1 & redist2 > redist3),
         redist3_1 = as.integer(redist3 > redist2 & redist3 > redist1),
         redist1_3 = as.integer(redist1 < redist2 & redist1 < redist3),
         redist2_3 = as.integer(redist2 < redist1 & redist2 < redist3),
         redist3_3 = as.integer(redist3 < redist2 & redist3 < redist1)) %>%
  summarise(redist1_1 = mean(redist1_1, na.rm = TRUE),
            redist2_1 = mean(redist2_1, na.rm = TRUE),
            redist3_1 = mean(redist3_1, na.rm = TRUE),
            redist1_3 = mean(redist1_3, na.rm = TRUE),
            redist2_3 = mean(redist2_3, na.rm = TRUE),
            redist3_3 = mean(redist3_3, na.rm = TRUE))


## FIGURE 1 -----------------------------------------------------------------

# Overall correlation across policy types 
cor(df$redist_idx_n[df$treated==0], df$symbol_idx_n[df$treated==0], 
    use="complete.obs")

# Party ID distribution 
table(df$party_id[df$treated==0])

# Summarize data points
plot_df <- df %>%
  filter(treated==0) %>%
  group_by(symbol_idx_n, redist_idx_n, party_id) %>%
  summarise(n=n()) %>%
  mutate(party_id = case_when(party_id=="Democrat" ~ "Party ID: Democrat\n(n=187)",
                              party_id=="Independent" ~ "Party ID: Independent\n(n=178)",
                              party_id=="Republican" ~ "Party ID: Republican\n(n=151)"))

# Party ID 
p1 <- ggplot(plot_df) + 
  facet_wrap(~party_id) + 
  geom_abline(slope=1,intercept=0, lty="dashed", col="gray50", size=0.5) + 
  geom_point(aes(symbol_idx_n, redist_idx_n, size=n),
             alpha=0.2) + 
  geom_smooth(data=df %>% filter(treated==0) %>% 
                mutate(party_id = case_when(party_id=="Democrat" ~ "Party ID: Democrat\n(n=187)",
                                            party_id=="Independent" ~ "Party ID: Independent\n(n=178)",
                                            party_id=="Republican" ~ "Party ID: Republican\n(n=151)")), 
              aes(symbol_idx_n, redist_idx_n),
              color="black",
              alpha=0.2, method="lm", se=T, size=0.8) + 
  labs(x="\nSupport for\nsymbolic policies", y="Support for\nredistributive\npolicies") + 
  guides(size="none", col="none", fill="none") + 
  scale_x_continuous(breaks=c(0,5,10), 
                     labels=c("Strongly\noppose\n(0)", "Neither support\nnor oppose\n(5)", 
                              "Strongly\nsupport\n(10)"), expand = c(0.1, 0.1)) + 
  scale_y_continuous(breaks=c(0,5,10), 
                     labels=c("Strongly\noppose\n(0)", "Neither support\nnor oppose\n(5)", 
                              "Strongly\nsupport\n(10)"), expand = c(0.1, 0.1)) +
  theme(axis.title.y = element_text(angle=0, vjust=0.5, size=12, color = "white"),
        axis.title.x = element_text(size=12, color = "white"),
        axis.text.x = element_text(size=6),
        axis.text.y = element_text(size=6),
        strip.text = element_text(size=7),
        panel.spacing = unit(1, "lines"),
        plot.margin = unit(c(0, 1, 0,0), "cm"))

# Race distribution 
table(df$race[df$treated==0])

plot_df <- df %>%
  filter(treated==0) %>%
  group_by(symbol_idx_n, redist_idx_n, race) %>%
  summarise(n=n()) %>%
  mutate(race = case_when(race=="White" ~ "Race: White alone\n(n=372)",
                          race=="Black" ~ "Race: Black alone\n(n=77)",
                          race=="Other" ~ "Race: Other/Multi-racial\n(n=67)"),
         race = factor(race,
                       levels=c("Race: White alone\n(n=372)",
                                "Race: Black alone\n(n=77)",
                                "Race: Other/Multi-racial\n(n=67)"),
                       ordered = TRUE))

p2 <- ggplot(plot_df) + 
  facet_wrap(~race) + 
  geom_abline(slope=1,intercept=0, lty="dashed", col="gray50", size=0.5) + 
  geom_point(aes(symbol_idx_n, redist_idx_n, size=n),
             alpha=0.2) + 
  geom_smooth(data=df %>% filter(treated==0) %>%
                mutate(race = case_when(race=="White" ~ "Race: White alone\n(n=372)",
                                        race=="Black" ~ "Race: Black alone\n(n=77)",
                                        race=="Other" ~ "Race: Other/Multi-racial\n(n=67)"),
                       race = factor(race,
                                     levels=c("Race: White alone\n(n=372)",
                                              "Race: Black alone\n(n=77)",
                                              "Race: Other/Multi-racial\n(n=67)"),
                                     ordered = TRUE)), 
              aes(symbol_idx_n, redist_idx_n), 
              col="black",
              alpha=0.2, method="lm", se=T, size=0.8) + 
  labs(x="\nSupport for\nsymbolic policies", y="Support for\nredistributive\npolicies") + 
  guides(size="none", col="none", fill="none") + 
  scale_x_continuous(breaks=c(0,5,10), 
                     labels=c("Strongly\noppose\n(0)", "Neither support\nnor oppose\n(5)", 
                              "Strongly\nsupport\n(10)"), expand = c(0.1, 0.1)) + 
  scale_y_continuous(breaks=c(0,5,10), 
                     labels=c("Strongly\noppose\n(0)", "Neither support\nnor oppose\n(5)", 
                              "Strongly\nsupport\n(10)"), expand = c(0.1, 0.1)) +
  theme(axis.title.y = element_text(angle=0, vjust=0.5, size=12),
        axis.title.x = element_text(size=12, color = "white"),
        axis.text.x = element_text(size=6),
        axis.text.y = element_text(size=6),
        strip.text = element_text(size=7),
        panel.spacing = unit(1, "lines"),
        plot.margin = unit(c(0, 1, 0,0), "cm"))


# RR distribution 
table(df$rr_cat[df$treated==0])

plot_df <- df %>%
  filter(treated==0) %>%
  group_by(symbol_idx_n, redist_idx_n, rr_cat) %>%
  summarise(n=n()) %>%
  mutate(rr_cat = case_when(rr_cat=="Low" ~ "Racial Resentment: Low\n(n=109)",
                            rr_cat=="Medium" ~ "Racial Resentment: Medium\n(n=293)",
                            rr_cat=="High" ~ "Racial Resentment: High\n(n=114)"),
         rr_cat = factor(rr_cat, levels=c("Racial Resentment: Low\n(n=109)",
                                          "Racial Resentment: Medium\n(n=293)",
                                          "Racial Resentment: High\n(n=114)"),
                         ordered = TRUE))

p3 <- ggplot(plot_df) + 
  facet_wrap(~rr_cat) + 
  geom_abline(slope=1,intercept=0, lty="dashed", col="gray50", size=0.5) + 
  geom_point(aes(symbol_idx_n, redist_idx_n, size=n),
             alpha=0.2) + 
  geom_smooth(data=df %>% filter(treated==0) %>%
                mutate(rr_cat = case_when(rr_cat=="Low" ~ "Racial Resentment: Low\n(n=109)",
                                          rr_cat=="Medium" ~ "Racial Resentment: Medium\n(n=293)",
                                          rr_cat=="High" ~ "Racial Resentment: High\n(n=114)"),
                       rr_cat = factor(rr_cat, levels=c("Racial Resentment: Low\n(n=109)",
                                                        "Racial Resentment: Medium\n(n=293)",
                                                        "Racial Resentment: High\n(n=114)"),
                                       ordered = TRUE)), 
              aes(symbol_idx_n, redist_idx_n), 
              alpha=0.2, col="black",
              method="lm", se=T, size=0.8) + 
  labs(x="\nSupport for\nsymbolic policies", y="Support for\nredistributive\npolicies") + 
  guides(size="none", col="none", fill="none") + 
  scale_x_continuous(breaks=c(0,5,10), 
                     labels=c("Strongly\noppose\n(0)", "Neither support\nnor oppose\n(5)", 
                              "Strongly\nsupport\n(10)"), expand = c(0.1, 0.1)) + 
  scale_y_continuous(breaks=c(0,5,10), 
                     labels=c("Strongly\noppose\n(0)", "Neither support\nnor oppose\n(5)", 
                              "Strongly\nsupport\n(10)"), expand = c(0.1, 0.1)) + 
  theme(axis.title.y = element_text(angle=0, vjust=0.5, color = "white", size=12),
        axis.title.x = element_text(size=12),
        axis.text.x = element_text(size=6),
        axis.text.y = element_text(size=6),
        strip.text = element_text(size=7),
        panel.spacing = unit(1, "lines"),
        plot.margin = unit(c(0, 1, 0,0), "cm"))

# Combine plots
plot_grid(p1, p2, p3, rows = 3)


# Percentage of respondents that favour one but not the other
mean((df$redist_idx_n[df$treated==0] > 5 & df$symbol_idx_n[df$treated==0] < 5) | 
       (df$redist_idx_n[df$treated==0] < 5 & df$symbol_idx_n[df$treated==0] > 5), na.rm = TRUE)



## FIGURE 2 ---------------------------------------------------------------------------

# Distribution of moderator
table(df$mech_progress[df$treated==0])

plot_df <- df %>% 
  filter(treated==0 & !is.na(mech_progress)) %>%
  mutate(mech_progress = ifelse(mech_progress <2,
                                "Still more to do\non racial justice\n(n=349)",
                                "Enough/too much has been\ndone on racial justice\n(n=166)")) %>%
  group_by(mech_progress, policy_pref_forced) %>%
  summarise(n=n()) %>%
  group_by(mech_progress) %>%
  mutate(pct = n/sum(n))

ggplot(plot_df) + 
  facet_wrap(~mech_progress) + 
  geom_bar(aes(policy_pref_forced, pct, fill=mech_progress), 
           alpha=0.8, stat="identity") + 
  geom_text(aes(policy_pref_forced, pct, label = paste0(round(pct*100), "%")), 
            vjust = -0.5) +
  guides(fill="none") +
  theme(axis.title.y = element_text(angle=0, vjust=0.5),
        plot.title = element_text(hjust = 0.5)) + 
  scale_y_continuous(labels=scales::percent, limits = c(0, 0.8), 
                     breaks=seq(0,1,0.25)) + 
  scale_fill_manual(values=c("gray60", "gray20")) + 
  labs(x="\nPreferred policy approach\nif you could only choose one", 
       y = "Percentage\nof respondents") + 
  theme(panel.spacing = unit(1, "lines"),
        axis.text.y = element_blank(),
        axis.text.x = element_text(size=8),
        axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12),
        axis.ticks.y = element_blank(), 
        strip.text = element_text(size=10))

## FIGURE 3 ---------------------------------------------------------------------------

plot_df <- df %>% 
  filter(treated==0 & !is.na(mech_progress)) %>%
  mutate(mech_progress = ifelse(mech_progress <2,
                                "Still more to do\non racial justice\n(n=349)",
                                "Enough/too much has been\ndone on racial justice\n(n=166)")) %>%
  group_by(mech_progress, mech_symbols_distract) %>%
  summarise(n=n()) %>%
  group_by(mech_progress) %>%
  mutate(pct = n/sum(n))

ggplot(plot_df) + 
  facet_wrap(~mech_progress) + 
  geom_bar(aes(mech_symbols_distract, pct, fill=mech_progress), 
           alpha=0.8, stat="identity") + 
  geom_text(aes(mech_symbols_distract, pct, label = paste0(round(pct*100), "%")), 
            vjust = -0.5) +
  guides(fill="none") +
  theme(axis.title.y = element_text(angle=0, vjust=0.5),
        plot.title = element_text(hjust = 0.5)) + 
  scale_y_continuous(labels=scales::percent, limits = c(0, 0.4), 
                     breaks=seq(0,1,0.1)) + 
  scale_x_continuous(breaks=1:5,
                     labels=c("Strongly\ndisagree", "", 
                              "Neither agree\nnor disagree",
                              "", "Strongly\nagree")) + 
  scale_fill_manual(values=c("gray60", "gray20")) + 
  labs(x="\nStatue removals distract from\nlarger racial justice problems", 
       y = "Percentage\nof respondents") + 
  theme(panel.spacing = unit(1, "lines"),
        axis.text.y = element_blank(),
        axis.text.x = element_text(size=8),
        axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12),
        axis.ticks.y = element_blank(), 
        strip.text = element_text(size=12))


## TABLE 2 ---------------------------------------------------------------------------

# Define covariate conditioning set 
cov_set <- "treated + age_z + region + man + race*race_id_import_z + 
                    left_right_z + pol_interest_z + pol_know_z + 
                    party_id + usa_pride_z + rr_z + educ_z + hh_income_z"

# Estimate models 
mod1 <- lm_robust(as.formula(paste("symbol_idx_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod1b <- lm(as.formula(paste("symbol_idx_z", cov_set, sep="~")), 
            data=exp_df)

mod2 <- lm_robust(as.formula(paste("redist_idx_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod2b <- lm(as.formula(paste("redist_idx_z", cov_set, sep="~")), 
            data=exp_df)

# Main text
stargazer(mod1b, mod2b,
          se=c(starprep(mod1), starprep(mod2)),
          #    type="text",
          keep.stat = c("rsq", "n"),
          keep=c("treated", "trt"))


## FIGURE 4 ----------------------------------------------------------

# Join to main data frame
txt_df <- df %>% 
  # Keep only treated respondents
  filter(treated==1) %>%
  # Remove those without single opinion (n=6) or indifferent (n=82)
  filter(!(txt_support==1 & txt_oppose==1) & 
           !(txt_support==0 & txt_oppose==0) & 
           !is.na(Treatment.open.ended)) 

# Create DFM 
dfm1 <- txt_df %>%
  corpus(text_field = "Treatment.open.ended", 
         docid_field = "ResponseId") %>%
  tokens(remove_punct = TRUE, 
         remove_numbers = TRUE, 
         include_docvars = TRUE) %>%
  tokens_remove(c(stopwords("English"))) %>%
  dfm() %>% 
  # Lemmatize 
  dfm_replace(lexicon::hash_lemmas$token, replacement = lexicon::hash_lemmas$lemma) 

# Conduct keyness analysis
key_top <- textstat_keyness(dfm1, target = dfm1@docvars$txt_oppose==1) %>% 
  group_by(chi2>0) %>%
  slice_max(abs(chi2), n = 20) %>% 
  mutate(feature = reorder(feature, chi2))

ggplot() +
  geom_col(data=key_top, aes(x = chi2, y = feature, fill = chi2)) +
  scale_fill_gradient2(
    low = "steelblue",   
    mid = "white",       
    high = "firebrick",  
    midpoint = 0
  ) + 
  geom_text(data=key_top,
            aes(y = feature, label = feature, 
                x = ifelse(chi2 > 0, chi2 + 1.25, chi2 - 1.25)),  
            hjust = ifelse(key_top$chi2 > 0, 0, 1),  
            size = 3,
            color = "black"
  ) +
  geom_richtext(data=data.frame(), aes(22, y=45, label="More used by<br>respondents <b>opposed</b><br>to removals"),
                lineheight = 0.8, size=3.5,
                fill = NA, label.color = NA, 
  ) + 
  geom_richtext(data=data.frame(), aes(-22, y=45, label="More used by<br>respondents <b>supportive</b><br>of removals"),
                lineheight = 0.8, size=3.5,
                fill = NA, label.color = NA 
  ) + 
  guides(fill="none") +
  labs(x="\nRelative word usage\n(χ² statistic)", y="") +
  scale_x_continuous(breaks=seq(-25,50,25)) + 
  coord_cartesian(ylim=c(0, 48),
                  xlim=c(-30,52)) + 
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        text = element_text(family = "Helvetica", size = 18),
        axis.line.y = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_text(size=10),
        axis.ticks.y = element_blank(),
        axis.title.x = element_text(size=12)
  )


# APPENDICES ----------------------------------------------

## TABLE A1/A2 ------------------------------------------------

# Helper functions
recode_eval <- function(x) {
  x <- case_when(x=="" ~ NA,
                 x=="Very well" ~ 2,
                 x=="Somewhere in between" ~ 1,
                 x=="Not well at all" ~ 0)
  return(x)
}
get_mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

# Pretest data 
p_df <- read.csv("./clean_data/clean_pretest_data.csv")

# Recode main measures to numeric 
for (i in 29:130) {
  p_df[,i] <- recode_eval(p_df[,i])
}

# Convert to long format 
p_df <- p_df %>%
  pivot_longer(cols = names(p_df[,29:130])) %>%
  mutate(policy_num = substr(name, 2,3),
         rating_type = case_when(substr(name, 5,5)=="1" ~ "Non-racial",
                                 substr(name, 5,5)=="2" ~ "Symbolic",
                                 substr(name, 5,5)=="3" ~ "Redistributive")
  )

# Policy names 
pol_names <- read.csv("./raw_data/pretest_data.csv", stringsAsFactors = FALSE)[1,] %>%
  pivot_longer(cols = names(.[,29:130])) %>%
  select(name, value) %>%
  rowwise() %>%
  mutate(policy_num = substr(name, 2,3),
         policy = substr(value, 8, nchar(value)), 
         policy = trimws(strsplit(policy, "\n")[[1]][1])) %>%
  select(policy_num, policy) %>%
  distinct() %>%
  mutate(policy_type = case_when(policy_num %in% c(17, 57, 59, 61, 63, 65, 67, 69, 71, 73) ~
                                   "Symbolic",
                                 policy_num %in% c(51, 53, 55) ~
                                   "Symbolic backlash",
                                 policy_num %in% c(20, 22, 75, 77, 79, 81, 83, 85, 87) ~ 
                                   "Redistributive",
                                 policy_num %in% c(45, 47, 49) ~
                                   "Redistributive backlash",
                                 policy_num %in% c(25, 27, 29, 31, 33, 37, 39, 41, 43) ~
                                   "Non-racial"
  ))

# Summarize by policy (distinctiveness, modal type)
plot_df <- p_df %>%
  group_by(ResponseId, policy_num, rating_type) %>%
  summarise(score = mean(value, na.rm = TRUE)) %>%
  filter(!is.nan(score)) %>%
  group_by(ResponseId, policy_num) %>%
  summarise(voted_type = ifelse(length(score[score==max(score)])==1, 
                                rating_type[score==max(score)], "Indeterminant"),
            distinctiveness = max(score) / sum(score),
            score_avg = mean(score==2)
  ) %>%
  group_by(policy_num) %>%
  summarise(n_raters = length(unique(ResponseId)), 
            modal_cat = get_mode(voted_type), 
            distinctiveness = mean(distinctiveness, na.rm = TRUE)
  ) %>% 
  # Attach policy names
  left_join(pol_names, by = "policy_num") 

# Average score within intended category 
plot_df <- p_df %>%
  group_by(policy_num, rating_type) %>%
  summarise(score = mean(value, na.rm = TRUE)) %>%
  left_join(pol_names, by = "policy_num")  %>%
  mutate(score = round(scales::rescale(score, from=c(0,2), to=c(0,1)), 2)) %>% 
  filter(rating_type==policy_type)





## TABLE A4 ------------------------------------------------

(tbl_df <- df %>% 
   ungroup() %>%
   summarise(age = median(age, na.rm = TRUE),
             man = mean(man, na.rm = TRUE),
             bachelors = mean(educ >= 4, na.rm = TRUE)) %>% 
   t() %>%
   round(2) %>%
   as.data.frame())

# Categorical 
prop.table(table(df$region))
prop.table(table(df$party_id))
prop.table(table(df$race))
prop.table(table(df$age_cat))

## APPENDIX B4 ----------------------------------------------

# Create missingness indicator
df <- df %>% 
  mutate(attrit = as.integer(apply(df %>% select(symbol1:backlash3), MARGIN = 1,
                                   FUN = function(x) {sum(is.na(x))}) > 1))

# Estimate attrition models 
mod1 <- lm_robust(attrit ~ treated + age_z + region + man + race + race_id_import_z + 
                    left_right_z + pol_interest_z + pol_know_z + 
                    party_id + usa_pride_z + rr_z + educ_z + hh_income_z,
                  data = df,
                  se_type = "HC2")
mod2 <- lm_robust(attrit ~ treated*age_z + treated*region + treated*man + treated*race + 
                    treated*race_id_import_z + treated*left_right_z + treated*pol_interest_z + 
                    treated*pol_know_z + treated*party_id + treated*usa_pride_z + 
                    treated*rr_z + treated*educ_z + treated*hh_income_z,
                  data = df,
                  se_type = "HC2")

# Report p-values 
mod1$p.value["treated"]
lmtest::waldtest(mod1, mod2)

## FIGURE A1 --------------------------------------------------

# Alphas 
psych::alpha(df %>%
               filter(treated==0) %>%
               ungroup() %>%
               select(symbol1, symbol2, symbol3) %>%
               na.omit())
psych::alpha(df %>%
               filter(treated==0) %>%
               ungroup() %>%
               select(redist1, redist2, redist3) %>%
               na.omit())
psych::alpha(df %>%
               filter(treated==0) %>%
               ungroup() %>%
               select(backlash1, backlash2, backlash3) %>%
               na.omit())

# Correlation matrix 
df_long <- df %>% 
  filter(treated==0) %>%
  pivot_longer(cols = c(symbol1, symbol2, symbol3,
                        redist1, redist2, redist3, 
                        backlash1, backlash2, backlash3),
               names_to = "var_x", values_to = "x") %>%
  select(ResponseId, var_x, x) %>%
  # Rename policies
  mutate(var_x = case_when(var_x=="symbol1" ~ "Build Lynching\nMemorial\n(Symbolic)",
                           var_x=="symbol2" ~ "Celebrate\nJuneteenth\n(Symbolic)",
                           var_x=="symbol3" ~ "Grants for African\nAmerican Museum\n(Symbolic)",
                           var_x=="redist1" ~ "Expand\nDEI Hiring\n(Redistributive)",
                           var_x=="redist2" ~ "Anti-Segregation\nZoning\n(Redistributive)",
                           var_x=="redist3" ~ "Reallocate\nPolice Budget\n(Redistributive)",
                           var_x=="backlash1" ~ "Ban Confederate\nStatue Removals\n(Backlash)",
                           var_x=="backlash2" ~ "Allow Housing\nDiscrimination\n(Backlash)",
                           var_x=="backlash3" ~ "Increase Welfare\nRequirements\n(Backlash)"))
  
df_long <- df_long %>%
  # Join to itself to get pairs 
  inner_join(df_long %>% rename(var_y = var_x, y = x),
             by = "ResponseId") %>%
  # Drop matches to same variable
  filter(var_x != var_y) %>%
  filter(!is.na(x) & !is.na(y))

# Keep only lower triangle of cor matrix
var_order <- c(
  "Build Lynching\nMemorial\n(Symbolic)",
  "Celebrate\nJuneteenth\n(Symbolic)",
  "Grants for African\nAmerican Museum\n(Symbolic)",
  "Expand\nDEI Hiring\n(Redistributive)",
  "Anti-Segregation\nZoning\n(Redistributive)",
  "Reallocate\nPolice Budget\n(Redistributive)",
  "Ban Confederate\nStatue Removals\n(Backlash)",
  "Allow Housing\nDiscrimination\n(Backlash)",
  "Increase Welfare\nRequirements\n(Backlash)"
)

df_long <- df_long %>%
  mutate(
    var_x = factor(var_x, levels = var_order),
    var_y = factor(var_y, levels = var_order)
  ) %>%
  filter(as.numeric(var_x) > as.numeric(var_y))

# Compute correlations 
cors <- df_long %>%
  group_by(var_x, var_y) %>%
  summarise(r = cor(x, y))


plot_df <- df_long %>%
  # Summarize pairwise relationships 
  group_by(var_x, var_y, x, y) %>%
  summarise(n=n())

ggplot(df_long, aes(x, y)) +
  facet_grid(var_y ~ var_x) +
  geom_point(data=plot_df, aes(size=n), 
             alpha = 0.2) +
  geom_smooth(method = "loess", se = FALSE, color = "red") + 
  geom_text(
    data = cors,
    aes(x = -Inf, y = Inf, label = paste0("r = ", round(r, 2))),
    hjust = -0.1, vjust = 1.1, inherit.aes = FALSE
  ) + 
  labs(x="\nSupport for policy\nin column axis",
       y="Support for policy\nin row axis") + 
  guides(size="none") + 
  theme(axis.text = element_blank(), 
        axis.title.y = element_text(size=12, angle=0, vjust=0.5),
        axis.title.x = element_text(size=12),
        strip.text.x = element_text(size=7),
        strip.text.y = element_text(size=7, angle=0, vjust=0.5)) + 
  scale_size_continuous(range = c(0.25, 2))



## FIGURE A3  --------------------------------------------------

# Wald test that all coefficients are equal to zero: 
mod1 <- lm_robust(treated ~ 1,
                  data = df %>%
                    select(treated, age_z, region, man, race, race_id_import_z,
                           left_right_z, pol_interest_z, pol_know_z,
                           party_id, usa_pride_z, rr_z, educ_z, hh_income_z) %>%
                    na.omit(),
                  se_type = "HC2")
mod2 <- lm_robust(treated ~ age_z + region + man + race + race_id_import_z + 
                    left_right_z + pol_interest_z + pol_know_z + 
                    party_id + usa_pride_z + rr_z + educ_z + hh_income_z,
                  data = df,
                  se_type = "HC2")

lmtest::waldtest(mod1, mod2)

# Plot coefficient estimates 

# Pull model estimates
coef_est <- mod2$coefficients[2:length(mod2$coefficients)]
coef_se <- mod2$std.error[2:length(mod2$std.error)]
coef_names <- c("Age", "Region: Northeast", "Region: South", "Region: West",
                "Man", "Race: Other", "Race: White", 
                "Racial ID Importance", "Left-right placement", "Political interest", 
                "Political knowledge", "Party ID: Independent", "Party ID: Republican",
                "American pride", "Racial resentment", "Education", "HH Income")

# Dummy coef: 
dummy_names <- c("Region: Midwest", "Race: Black", "Party ID: Democrat")
dummy_est <- rep(0, length(dummy_names)) 
dummy_se <- rep(0.00001, length(dummy_names)) 

coef_col <- c(rep(0, length(coef_est)), rep(1, length(dummy_est)))

coef_est <- c(coef_est, dummy_est)
coef_names <- c(coef_names, dummy_names)
coef_se <- c(coef_se, dummy_se)


ggplot() + 
  geom_vline(xintercept = 0, lty="dashed", col="grey") + 
  geom_point(aes(x=coef_est,
                 y=coef_names, col=factor(coef_col)),
             size=2) + 
  geom_segment(aes(x=coef_est - 1.96*coef_se,
                   xend=coef_est + 1.96*coef_se,
                   y=coef_names,
                   yend=coef_names), size=0.75) + 
  geom_segment(aes(x=coef_est - 1.64*coef_se,
                   xend=coef_est + 1.64*coef_se,
                   y=coef_names,
                   yend=coef_names), size=1.25) + 
  labs(x="Coefficient estimate", y="") + 
  scale_color_manual(values=c("black", "grey")) + 
  guides(col="none") + 
  scale_y_discrete(limits=rev)


## APPENDIX D2  --------------------------------------------------

# Self-reported surprise
prop.table(table(df$treatment_surprise[df$treated==1], useNA = "always"))

# Time on treatment (in seconds) 
summary(df$treatment_time)

### FIGURE A4 -------------------
plot_df <- df %>%
  group_by(treated, est_removals) %>%
  summarise(n=n()) %>%
  group_by(treated) %>%
  mutate(pct = n/sum(n),
         est_removals = ifelse(is.na(est_removals), "No response", est_removals),
         est_removals = factor(est_removals, levels=c("50", "100", "200", "300", 
                                                      "400", "500", "No response"),
                               ordered = TRUE))

ggplot(plot_df) + 
  geom_bar(aes(est_removals, pct, fill=factor(treated)), stat = "identity", 
           position = position_dodge()) + 
  labs(x="\nEstimated number\nof statue removals",
       y="Percentage\nof respondents",
       fill="") + 
  scale_y_continuous(labels = scales::percent) + 
  scale_fill_manual(values=c("gray70", "gray20"),
                    labels=c("Control", "Treated")) + 
  theme(legend.position.inside = c(0.8, 0.8), 
        axis.title.y = element_text(angle=0, vjust=0.5))


## TABLE A5 --------------------------------------------------

# Define covariate conditioning set 
cov_set <- "treated + age_z + region + man + race*race_id_import_z + 
                    left_right_z + pol_interest_z + pol_know_z + 
                    party_id + usa_pride_z + rr_z + educ_z + hh_income_z"

mod1 <- lm_robust(symbol_idx_z ~ treated,
                  data = exp_df,
                  se_type = "HC2")
mod1b <- lm(symbol_idx_z ~ treated, data=exp_df)
mod2 <- lm_robust(as.formula(paste("symbol_idx_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod2b <- lm(as.formula(paste("symbol_idx_z", cov_set, sep="~")), 
            data=exp_df)

mod3 <- lm_robust(redist_idx_z ~ treated,
                  data = exp_df,
                  se_type = "HC2")
mod3b <- lm(redist_idx_z ~ treated, data=exp_df)
mod4 <- lm_robust(as.formula(paste("redist_idx_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod4b <- lm(as.formula(paste("redist_idx_z", cov_set, sep="~")), 
            data=exp_df)

mod5 <- lm_robust(backlash_idx_z ~ treated,
                  data = exp_df,
                  se_type = "HC2")
mod5b <- lm(backlash_idx_z ~ treated, data=exp_df)
mod6 <- lm_robust(as.formula(paste("backlash_idx_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod6b <- lm(as.formula(paste("backlash_idx_z", cov_set, sep="~")), 
            data=exp_df)

stargazer(mod1b, mod2b, mod3b, mod4b, mod5b, mod6b, 
          se=c(starprep(mod1), starprep(mod2), starprep(mod3),
               starprep(mod4), starprep(mod5), starprep(mod6)),
          type="text",
          keep.stat = c("rsq", "n"),
          keep=c("treated", "trt"))

## TABLE A6 --------------------------------------------
mod <- multinom(policy_pref_forced ~ treated + age_z + region + man + race * race_id_import_z + 
                  left_right_z + pol_interest_z + pol_know_z + party_id + usa_pride_z + 
                  rr_z + educ_z + hh_income_z, 
                data = exp_df)

stargazer(mod, 
            type="text", 
          keep="treated") 

## TABLE A7 --------------------------------------------

# Define covariate conditioning set 
cov_set <- "treated + age_z + region + man + race*race_id_import_z + 
                    left_right_z + pol_interest_z + pol_know_z + 
                    party_id + usa_pride_z + rr_z + educ_z + hh_income_z"

mod1 <- lm_robust(as.formula(paste("symbol1_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod1b <- lm(as.formula(paste("symbol1_z", cov_set, sep="~")), 
            data=exp_df)
mod2 <- lm_robust(as.formula(paste("symbol2_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod2b <- lm(as.formula(paste("symbol2_z", cov_set, sep="~")), 
            data=exp_df)
mod3 <- lm_robust(as.formula(paste("symbol3_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod3b <- lm(as.formula(paste("symbol3_z", cov_set, sep="~")), 
            data=exp_df)

# Redistributive
mod4 <- lm_robust(as.formula(paste("redist1_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod4b <- lm(as.formula(paste("redist1_z", cov_set, sep="~")), 
            data=exp_df)
mod5 <- lm_robust(as.formula(paste("redist2_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod5b <- lm(as.formula(paste("redist2_z", cov_set, sep="~")), 
            data=exp_df)
mod6 <- lm_robust(as.formula(paste("redist3_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod6b <- lm(as.formula(paste("redist3_z", cov_set, sep="~")), 
            data=exp_df)

# Backlash
mod7 <- lm_robust(as.formula(paste("backlash1_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod7b <- lm(as.formula(paste("backlash1_z", cov_set, sep="~")), 
            data=exp_df)
mod8 <- lm_robust(as.formula(paste("backlash2_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod8b <- lm(as.formula(paste("backlash2_z", cov_set, sep="~")), 
            data=exp_df)
mod9 <- lm_robust(as.formula(paste("backlash3_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod9b <- lm(as.formula(paste("backlash3_z", cov_set, sep="~")), 
            data=exp_df)


stargazer(mod1b, mod2b, mod3b, 
          se=c(starprep(mod1), starprep(mod2), starprep(mod3)),
          #      type="text",
          keep.stat = c("rsq", "n"),
          keep=c("treated", "trt"))

stargazer(mod4b, mod5b, mod6b, 
          se=c(starprep(mod4), starprep(mod5), starprep(mod6)),
          #     type="text",
          keep.stat = c("rsq", "n"),
          keep=c("treated", "trt"))

stargazer(mod7b, mod8b, mod9b, 
          se=c(starprep(mod7), starprep(mod8), starprep(mod9)),
          #     type="text",
          keep.stat = c("rsq", "n"),
          keep=c("treated", "trt"))


## TABLE A8 --------------------------------------------

# Define covariate conditioning set 
cov_set <- "treated + age_z + region + man + race*race_id_import_z + 
                    left_right_z + pol_interest_z + pol_know_z + 
                    party_id + usa_pride_z + rr_z + educ_z + hh_income_z"

mod1 <- lm_robust(as.formula(paste("mech_priority_self_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod1b <- lm(as.formula(paste("mech_priority_self_z", cov_set, sep="~")), 
            data=exp_df)
mod2 <- lm_robust(as.formula(paste("mech_priority_others_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod2b <- lm(as.formula(paste("mech_priority_others_z", cov_set, sep="~")), 
            data=exp_df)
mod3 <- lm_robust(as.formula(paste("mech_opportunity_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod3b <- lm(as.formula(paste("mech_opportunity_z", cov_set, sep="~")), 
            data=exp_df)
mod4 <- lm_robust(as.formula(paste("mech_reputation_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod4b <- lm(as.formula(paste("mech_reputation_z", cov_set, sep="~")), 
            data=exp_df)
mod5 <- lm_robust(as.formula(paste("mech_identity_threat_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod5b <- lm(as.formula(paste("mech_identity_threat_z", cov_set, sep="~")), 
            data=exp_df)
mod6 <- lm_robust(as.formula(paste("mech_symbols_distract_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod6b <- lm(as.formula(paste("mech_symbols_distract_z", cov_set, sep="~")), 
            data=exp_df)
mod7 <- lm_robust(as.formula(paste("mech_gov_intervention_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod7b <- lm(as.formula(paste("mech_gov_intervention_z", cov_set, sep="~")), 
            data=exp_df)
mod8 <- lm_robust(as.formula(paste("mech_progress_z", cov_set, sep="~")),
                  data = exp_df,
                  se_type = "HC2")
mod8b <- lm(as.formula(paste("mech_progress_z", cov_set, sep="~")), 
            data=exp_df)


stargazer(mod1b, mod2b, mod3b, 
          se=c(starprep(mod1), starprep(mod2), starprep(mod3)),
          #      type="text",
          keep.stat = c("rsq", "n"),
          keep=c("treated", "trt"))

stargazer(mod4b, mod5b, mod6b, 
          se=c(starprep(mod4), starprep(mod5), starprep(mod6)),
          #     type="text",
          keep.stat = c("rsq", "n"),
          keep=c("treated", "trt"))

stargazer(mod7b, mod8b, 
          se=c(starprep(mod7), starprep(mod8)),
          #     type="text",
          keep.stat = c("rsq", "n"),
          keep=c("treated", "trt"))


## FIGURE A5 -------------------------------------------------

# Define covariate conditioning set 
cov_set <- "treated*(party_id + age_z + region + man + race + 
                    race_id_import_z + left_right_z + hi_pol_interest + pol_know_z + 
                    party_id + usa_pride_z + rr_cat + educ_z + hh_income_z)"

# Estimate models 
mod1 <- lm_robust(as.formula(paste("symbol_idx_z", cov_set, sep="~")),
                  data = df,
                  se_type = "HC2")
mod2 <- lm_robust(as.formula(paste("redist_idx_z", cov_set, sep="~")),
                  data = df,
                  se_type = "HC2")
mod3 <- lm_robust(as.formula(paste("backlash_idx_z", cov_set, sep="~")),
                  data = df,
                  se_type = "HC2")

# Calculate marginal effects -- Party ID
marg_fx <- rbind(summary(margins(mod1, at = list(party_id=unique(df$party_id)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Symbolic", 
                          df = mod1$df.residual),
                 summary(margins(mod2, at = list(party_id=unique(df$party_id)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Redistributive", 
                          df = mod2$df.residual),
                 summary(margins(mod3, at = list(party_id=unique(df$party_id)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Backlash", 
                          df = mod3$df.residual)
)
# Reorder factor levels
marg_fx$outcome <- factor(marg_fx$outcome,
                          levels=c("Symbolic", "Redistributive", "Backlash"),
                          ordered=TRUE)

(p1 <- ggplot(marg_fx) + 
    geom_hline(yintercept = 0, lty="dashed", col="grey") + 
    geom_point(aes(outcome, AME, col = party_id), 
               position=position_dodge(width=0.3), size=2) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.95, df)*SE,
                       ymax=AME + qt(0.95, df)*SE, col = party_id), 
                   linewidth=1.15,
                   position=position_dodge(width=0.3)) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.975, df)*SE,
                       ymax=AME + qt(0.975, df)*SE, col = party_id), 
                   linewidth=0.5,
                   position=position_dodge(width=0.3)) + 
    scale_color_manual(values = c("blue", "darkgrey", "red")) + 
    labs(x="", y="CATE\non policy\nsupport", col="Party ID") + 
    theme(axis.title.y = element_text(angle=0, vjust=0.5), 
          text = element_text(size=18),
          legend.position = "top")
)

# Calculate marginal effects -- Racial resentment 
marg_fx <- rbind(summary(margins(mod1, at = list(rr_cat=unique(df$rr_cat)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Symbolic", 
                          df = mod1$df.residual),
                 summary(margins(mod2, at = list(rr_cat=unique(df$rr_cat)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Redistributive", 
                          df = mod2$df.residual),
                 summary(margins(mod3, at = list(rr_cat=unique(df$rr_cat)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Backlash", 
                          df = mod3$df.residual)
)
# Reorder factor levels
marg_fx$outcome <- factor(marg_fx$outcome,
                          levels=c("Symbolic", "Redistributive", "Backlash"),
                          ordered=TRUE)
marg_fx$rr_cat <- factor(marg_fx$rr_cat,
                         levels=c("Low", "Medium", "High"),
                         ordered=TRUE)

(p2 <- ggplot(marg_fx) + 
    geom_hline(yintercept = 0, lty="dashed", col="grey") + 
    geom_point(aes(outcome, AME, col = rr_cat), 
               position=position_dodge(width=0.3), size=2) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.975, df)*SE,
                       ymax=AME + qt(0.975, df)*SE, col = rr_cat), 
                   size=0.75,
                   position=position_dodge(width=0.3)) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.95, df)*SE,
                       ymax=AME + qt(0.95, df)*SE, col = rr_cat), 
                   size=1.25,
                   position=position_dodge(width=0.3)) + 
    scale_color_manual(values = c("darkgreen", "orange", "red")) + 
    labs(x="", y="CATE\non policy\nsupport", col="Racial Resentment") + 
    theme(axis.title.y = element_text(angle=0, vjust=0.5), 
          text = element_text(size=18),
          legend.position = "top")
)

# Calculate marginal effects -- Race
marg_fx <- rbind(summary(margins(mod1, at = list(race=unique(df$race)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Symbolic", 
                          df = mod1$df.residual),
                 summary(margins(mod2, at = list(race=unique(df$race)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Redistributive", 
                          df = mod2$df.residual),
                 summary(margins(mod3, at = list(race=unique(df$race)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Backlash", 
                          df = mod3$df.residual)
)
# Reorder factor levels
marg_fx$outcome <- factor(marg_fx$outcome,
                          levels=c("Symbolic", "Redistributive", "Backlash"),
                          ordered=TRUE)

(p3 <- ggplot(marg_fx) + 
    geom_hline(yintercept = 0, lty="dashed", col="grey") + 
    geom_point(aes(outcome, AME, col = race), 
               position=position_dodge(width=0.3), size=2) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.975, df)*SE,
                       ymax=AME + qt(0.975, df)*SE, col = race), 
                   size=0.75,
                   position=position_dodge(width=0.3)) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.95, df)*SE,
                       ymax=AME + qt(0.95, df)*SE, col = race), 
                   size=1.25,
                   position=position_dodge(width=0.3)) + 
    scale_color_manual(values = c("orange", "darkgreen", "purple")) + 
    labs(x="", y="CATE\non policy\nsupport", col="Race") + 
    theme(axis.title.y = element_text(angle=0, vjust=0.5), 
          text = element_text(size=18),
          legend.position = "top")
)

# Calculate marginal effects -- Region
marg_fx <- rbind(summary(margins(mod1, at = list(region=unique(df$region[!is.na(df$region)])), 
                                 variables = "treated")) %>%
                   mutate(outcome="Symbolic", 
                          df = mod1$df.residual),
                 summary(margins(mod2, at = list(region=unique(df$region[!is.na(df$region)])), 
                                 variables = "treated")) %>%
                   mutate(outcome="Redistributive", 
                          df = mod2$df.residual),
                 summary(margins(mod3, at = list(region=unique(df$region[!is.na(df$region)])), 
                                 variables = "treated")) %>%
                   mutate(outcome="Backlash", 
                          df = mod3$df.residual)
)

(p1 <- ggplot(marg_fx) + 
    geom_hline(yintercept = 0, lty="dashed", col="grey") + 
    geom_point(aes(outcome, AME, col = region), 
               position=position_dodge(width=0.3), size=2) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.95, df)*SE,
                       ymax=AME + qt(0.95, df)*SE, col = region), 
                   linewidth=1.15,
                   position=position_dodge(width=0.3)) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.975, df)*SE,
                       ymax=AME + qt(0.975, df)*SE, col = region), 
                   linewidth=0.5,
                   position=position_dodge(width=0.3)) + 
    scale_color_manual(values = c("blue", "darkgreen", "Orange", "red")) + 
    labs(x="", y="CATE\non policy\nsupport", col="Region") + 
    theme(axis.title.y = element_text(angle=0, vjust=0.5), 
          text = element_text(size=18),
          legend.position = "top")
)

# Calculate marginal effects -- Political interest
marg_fx <- rbind(summary(margins(mod1, at = list(hi_pol_interest=c(0,1)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Symbolic", 
                          df = mod1$df.residual),
                 summary(margins(mod2, at = list(hi_pol_interest=c(0,1)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Redistributive", 
                          df = mod2$df.residual),
                 summary(margins(mod3, at = list(hi_pol_interest=c(0,1)), 
                                 variables = "treated")) %>%
                   mutate(outcome="Backlash", 
                          df = mod3$df.residual)
)

(p2 <- ggplot(marg_fx) + 
    geom_hline(yintercept = 0, lty="dashed", col="grey") + 
    geom_point(aes(outcome, AME, col = factor(hi_pol_interest)), 
               position=position_dodge(width=0.3), size=2) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.975, df)*SE,
                       ymax=AME + qt(0.975, df)*SE, col = factor(hi_pol_interest)), 
                   size=0.75,
                   position=position_dodge(width=0.3)) + 
    geom_linerange(aes(x=outcome, 
                       ymin=AME - qt(0.95, df)*SE,
                       ymax=AME + qt(0.95, df)*SE, col = factor(hi_pol_interest)), 
                   size=1.25,
                   position=position_dodge(width=0.3)) + 
    scale_color_manual(values = c("darkgreen", "blue"),
                       labels = c("Below median", "Above or equal to median")) + 
    labs(x="", y="CATE\non policy\nsupport", col="Political Interest") + 
    theme(axis.title.y = element_text(angle=0, vjust=0.5), 
          text = element_text(size=18),
          legend.position = "top")
)

## TABLE A9 -----------------------------------------------

hc <- read.csv("./raw_data/openended_handcode.csv") %>% 
  mutate(code1 = substr(code1, 1,2),
         code2 = substr(code2, 1,2),
         code3 = substr(code3, 1,2))

# Summarize tier 1
plot_df <- hc %>% 
  mutate(code1 = substr(code1, 1,1),
         code2 = substr(code2, 1,1),
         code3 = substr(code3, 1,1)) %>%
  pivot_longer(cols = starts_with("code"),
               names_to = "code_num",
               values_to = "code",
               values_drop_na = TRUE) %>%
  group_by(code) %>%
  summarise(rows_with_code = n_distinct(row_number())) %>%
  mutate(percent_rows = rows_with_code / nrow(hc) * 100)

# Summarize tier 2 
plot_df <- hc %>%
  pivot_longer(cols = starts_with("code"),
               names_to = "code_num",
               values_to = "code",
               values_drop_na = TRUE) %>%
  group_by(code) %>%
  summarise(rows_with_code = n_distinct(row_number())) %>%
  mutate(percent_rows = rows_with_code / nrow(hc) * 100)


